#delimit;
clear;
set more off;

global temp /Sastemp;
global path ~;
set mem 5000m;

/*================================================
 Program: smoking_rates.do
 Author:  Avi Ebenstein
 Created: August 2008
 Purpose: Examine the smoking rates by province
=================================================*/

* ID is combination of a1 and b101 for rural sample;
use ~/data/china/china1995/ruralp;
egen serial=concat(a1 b101),punct("-");

rename b103 relate;
rename b104 sex;
rename b105 age;
rename b106 marst;
rename b109 minority;
rename b110 educ;

gen yrsed=.;
replace yrsed=16 if educ==1;
replace yrsed=14 if educ==2;
replace yrsed=12 if educ==3;
replace yrsed=10 if educ==4;
replace yrsed=8  if educ==5;
replace yrsed=6  if educ==6;
replace yrsed=3  if educ==7;
sort serial;

gen smoker=0;
replace smoker=1 if b112==1;
gen cigsperday=b112a;

save /Sastemp/ruraltemp, replace;

use ~/data/china/china1995/rincome;
egen serial=concat(a1 b101),punct("-");
sort serial;
merge serial using /Sastemp/ruraltemp;
tab _merge;
keep if _merge==3;
gen head=relate==1;
gen wife=relate==2;
egen hyrsed=max(head*yrsed),by(serial);
egen myrsed=max(wife*yrsed),by(serial);
egen hage=max(head*age),by(serial);
egen mage=max(wife*age),by(serial);
gen lninc=ln(ry);
save /Sastemp/ruraldata, replace;

* ID is n1 for urban sample;

use ~/data/china/china1995/urbanp;
gen serial=n1;

rename a3 relate;
rename a4 sex;
rename a5 age;
rename a6 marst;
rename a9 minority;
rename a11 educ;

gen yrsed=.;
replace yrsed=16 if educ==1;
replace yrsed=14 if educ==2;
replace yrsed=12 if educ==3;
replace yrsed=10 if educ==4;
replace yrsed=8  if educ==5;
replace yrsed=6  if educ==6;
replace yrsed=3  if educ==7;
sort serial;

gen mao=a19==1;
gen smoker=0;
replace smoker=1 if a20==1;
gen cigsperday=a21;
save /Sastemp/urbantemp, replace;

use ~/data/china/china1995/uincome;
gen serial=n1;
sort serial;
merge serial using /Sastemp/urbantemp;
tab _merge;
keep if _merge==3;
gen head=relate==1;
gen wife=relate==2;
egen hyrsed=max(head*yrsed),by(serial);
egen myrsed=max(wife*yrsed),by(serial);
egen hage=max(head*age),by(serial);
egen mage=max(wife*age),by(serial);
gen lninc=ln(uy);
save /Sastemp/urbandata, replace;

use /Sastemp/ruraldata;
gen urban=0;
append using /Sastemp/urbandata;
replace urban=1 if urban==.;

keep if age>=30;
collapse (mean) smoker (mean) cigsperday, by(province sex);
keep smoker province sex;
reshape wide smoker , i(province) j(sex);
rename smoker1 msmokershare;
rename smoker2 fsmokershare;
label var msmokershare "Fraction of males 30+ who smoke";
label var fsmokershare "Fraction of females 30+ who smoke";
save ~/pollution/datafiles/smoking_rates, replace;

use ~/pollution/datafiles/dsp_basins, clear;
rename province provname;
gen province=provgb;
sort province;
merge province using ~/pollution/datafiles/smoking_rates;
keep if _merge==3;

save ~/pollution/datafiles/smoking_data, replace;
